library(tidyverse)
library(plotrix) 
library(stargazer)
library(ggplot2)
library(ggeffects)

cases_all <- read.csv(file="benefit_of_a_video.csv",row.names = 1)

#Reorder factor levels

cases_all$group <- fct_relevel(cases_all$group, "survey","surveyvideo","videosurvey","video")
cases_all$sex <- fct_relevel(cases_all$sex, "Male","Female")
cases_all$education <- fct_relevel(cases_all$education, "primary","lower secondary","higher secondary","tertiary")

# Response rate and break-off rate summary statistics ########################

#summarise overall data and calculate contact rates and breakoff rates
df0<-cases_all[,c("started", "completed_quest")]
grouped_df0<-summarise_all(df0,funs(mean=mean,std_error=std.error,n=sum(!is.na(.))),na.rm = TRUE)
summarised_df0<-grouped_df0[1,c(1,3,2,4)]
summarised_df0[2,c(1,3)]<-grouped_df0[1,c(5,6)]
summarised_df0[1,]<-summarised_df0[1,]*100

#summarise grouped data and calculate mean and standard error
df1<- subset(cases_all, group %in% c("survey", "surveyvideo", "videosurvey", "video"))
df1<- df1[,c("started", "group")]
grouped_df1<-group_by(df1,group)
summarised_df1<-summarise_all(grouped_df1,funs(mean_click_rate=mean,std_error_click_rate=std.error))

df2<- subset(cases_all, group %in% c("survey", "surveyvideo", "videosurvey", "video"))
df2<- df2[,c("completed_quest", "group")]
grouped_df2<-group_by(df2,group)
summarised_df2<-summarise_all(grouped_df2,funs(mean_response_rate=mean,std_error_response_rate=std.error), na.rm = TRUE)

dfxx <- bind_cols(summarised_df1[1],summarised_df1[2:3]*100, summarised_df2[,2:3]*100)

x <- c("mean_click_rate","std_error_click_rate","mean_response_rate", "std_error_response_rate")

dfxx2<- dfxx %>%
   gather(var, value, -group) %>% 
   spread(group, value) %>% select( "var","survey", "surveyvideo", "videosurvey", "video") %>% slice(match(x, var)) 

summarised_df0_t<-t(summarised_df0)
colnames(summarised_df0_t)<-c("Total","n")
summarised_df0_t<-as_tibble(summarised_df0_t)
summary <- bind_cols(dfxx2,summarised_df0_t)

tableX <- summary %>% slice(1:4) %>% mutate_if(is.numeric, round, 2) %>% replace(., is.na(.), "-")

#FIGURE 1---------------------------
summarised_df1_fig1<-summarise_all(grouped_df1,funs(mean=mean,std_error=std.error))
summarised_df2_fig1<-summarise_all(grouped_df2,funs(mean=mean,std_error=std.error), na.rm = TRUE)

summarised_df1_fig1<-summarised_df1_fig1 %>% add_column(rate="click")
summarised_df2_fig1<-summarised_df2_fig1 %>% add_column(rate="response")

dfxx_fig1 <- bind_rows(summarised_df1_fig1,summarised_df2_fig1)

dfxx_fig1 <- dfxx_fig1 %>%  mutate_if(is.numeric,funs(.*100)) %>% filter(!is.na(mean))

tiff('figure1.tiff', units="in", width=8, height=5, res=150)
ggplot(dfxx_fig1, aes(x=group, y=mean, fill=rate)) + 
  geom_col(position=position_dodge2(preserve = "single")) +
  geom_errorbar(aes(ymin=mean-(1.96*std_error), ymax=mean+(1.96*std_error)),
                position=position_dodge2(width = .9,preserve = "single",padding = .7)) +scale_fill_brewer(palette="Paired",name="Type", labels=c("Click rate","Response rate")) +
  xlab("Experimental group") +
  ylab("Rate [%]") +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.','Video'))+
  theme_bw(base_size = 14)
dev.off()
# end Figure 1---------------------

#Tests for click rate differences between treatment groups #######

cases_all$started<- factor(cases_all$started) 

#test for difference in click rates between treatment groups
chisqtest_survey_contact1 <- subset(cases_all, group %in% c("survey","surveyvideo"))
chisq.test(chisqtest_survey_contact1$started, chisqtest_survey_contact1$group)

chisqtest_survey_contact2 <- subset(cases_all, group %in% c("videosurvey","surveyvideo"))
chisq.test(chisqtest_survey_contact2$started, chisqtest_survey_contact2$group)

#test for differences in jap_help between Surveyvideo and Videosurvey
ttest_survey_contact <- subset(cases_all, group %in% c("surveyvideo","videosurvey"))
t.test(ttest_survey_contact$jap_help[!is.na(ttest_survey_contact$jap_help)]~ttest_survey_contact$group[!is.na(ttest_survey_contact$jap_help)],alternative = "greater") 


#Response Rates by subgroup########################################## Appendix C Table 1 

df0<- cases_all  %>% select(group, sex, education, citizen, unemployed_at_invitation, started)

dfxxx <- df0 %>% mutate(rn = row_number(),value = 1) %>%
  pivot_wider(names_from = education, values_from = value, values_fill = list(value = 0)) %>%
  mutate_at(c("started"),funs(as.numeric(.)-1))
dfxxx$primary<-as.factor(dfxxx$primary)
levels(dfxxx$primary) <- c("nprimary","primary")
dfxxx$`lower secondary`<-as.factor(dfxxx$`lower secondary`)
levels(dfxxx$`lower secondary`) <- c("nlower secondary","lower secondary")
dfxxx$`higher secondary`<-as.factor(dfxxx$`higher secondary`)
levels(dfxxx$`higher secondary`) <- c("nhigher secondary","higher secondary")
dfxxx$tertiary<-as.factor(dfxxx$tertiary)
levels(dfxxx$tertiary) <- c("ntertiary","tertiary")

dfxxx1 <- dfxxx %>% select(-c("rn","NA")) %>% filter(!is.na(unemployed_at_invitation)) %>% group_by(group, unemployed_at_invitation) %>%
  summarise_at("started",funs(click_rate=mean,std_error=std.error)) %>%  rename("variable"=unemployed_at_invitation)
dfxxx2 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, sex) %>%
  summarise_at("started",funs(click_rate=mean,std_error=std.error)) %>%  rename("variable"=sex)
dfxxx3 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, citizen) %>%
  summarise_at("started",funs(click_rate=mean,std_error=std.error)) %>%  rename("variable"=citizen)

dfxxx4 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, primary) %>%
  summarise_at("started",funs(click_rate=mean,std_error=std.error)) %>%  rename("variable"=primary)
dfxxx5 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, `lower secondary`) %>%
  summarise_at("started",funs(click_rate=mean,std_error=std.error)) %>%  rename("variable"=`lower secondary`)
dfxxx6 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, `higher secondary`) %>%
  summarise_at("started",funs(click_rate=mean,std_error=std.error)) %>%  rename("variable"=`higher secondary`)
dfxxx7 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, tertiary) %>%
  summarise_at("started",funs(click_rate=mean,std_error=std.error)) %>%  rename("variable"=tertiary)

dfxxxsum <- bind_rows(dfxxx1,dfxxx2,dfxxx3,dfxxx4,dfxxx5,dfxxx6,dfxxx7)


dfxxxsum <- dfxxxsum%>% mutate_at(c("click_rate","std_error"),funs(.*100)) %>% mutate_at(c("click_rate","std_error"), round, 2)


appendixc_table1<- dfxxxsum %>% select(-c("std_error")) %>% spread(group, click_rate) %>% 
  filter(!(variable %in% c("nprimary","nlower secondary","nhigher secondary","ntertiary"))) %>%
  arrange (factor(.$variable,levels =c("Female","Male","Austrian","non-Austrian","unemployed","employed","primary","lower secondary","higher secondary","tertiary")))

#total
dfxxx1 <- dfxxx %>% select(-c("rn","NA")) %>% filter(!is.na(unemployed_at_invitation)) %>% group_by(unemployed_at_invitation) %>%
  summarise_at("started",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=unemployed_at_invitation)
dfxxx2 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(sex) %>%
  summarise_at("started",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=sex)
dfxxx3 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(citizen) %>%
  summarise_at("started",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=citizen)

dfxxx4 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(primary) %>%
  summarise_at("started",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=primary)
dfxxx5 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(`lower secondary`) %>%
  summarise_at("started",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=`lower secondary`)
dfxxx6 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(`higher secondary`) %>%
  summarise_at("started",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=`higher secondary`)
dfxxx7 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(tertiary) %>%
  summarise_at("started",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=tertiary)

dfxxxsum <- bind_rows(dfxxx1,dfxxx2,dfxxx3,dfxxx4,dfxxx5,dfxxx6,dfxxx7)
dfxxxsum <- dfxxxsum%>% mutate_at(c("Total"),funs(.*100)) %>% mutate_at(c("Total"), round, 2)

appendixc_table1_total<- dfxxxsum  %>% 
  filter(!(variable %in% c("nprimary","nlower secondary","nhigher secondary","ntertiary"))) %>%
  arrange (factor(.$variable,levels =c("Female","Male","Austrian","non-Austrian","unemployed","employed","primary","lower secondary","higher secondary","tertiary")))

appendixc_table1<-cbind(appendixc_table1,appendixc_table1_total[,2:3])
totalclick <- tableX[1,]
totalclick <- totalclick %>%  rename("variable"=var) 
appendixc_table1<-rbind(appendixc_table1, totalclick)
stargazer(appendixc_table1, type = "html", summary = FALSE, rownames = TRUE, out="appendixc_table1.doc")

#Response Rates by subgroup########################################## Appendix C Table 2 

df0<- cases_all  %>% select(group, sex, education, citizen, unemployed_at_invitation, completed_quest) %>% filter(group!="video")

dfxxx <- df0 %>% mutate(rn = row_number(),value = 1) %>%
  pivot_wider(names_from = education, values_from = value, values_fill = list(value = 0))
dfxxx$primary<-as.factor(dfxxx$primary)
levels(dfxxx$primary) <- c("nprimary","primary")
dfxxx$`lower secondary`<-as.factor(dfxxx$`lower secondary`)
levels(dfxxx$`lower secondary`) <- c("nlower secondary","lower secondary")
dfxxx$`higher secondary`<-as.factor(dfxxx$`higher secondary`)
levels(dfxxx$`higher secondary`) <- c("nhigher secondary","higher secondary")
dfxxx$tertiary<-as.factor(dfxxx$tertiary)
levels(dfxxx$tertiary) <- c("ntertiary","tertiary")

dfxxx1 <- dfxxx %>% select(-c("rn","NA")) %>% filter(!is.na(unemployed_at_invitation)) %>% group_by(group, unemployed_at_invitation) %>%
  summarise_at("completed_quest",funs(response_rate=mean,std_error=std.error)) %>%  rename("variable"=unemployed_at_invitation)
dfxxx2 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, sex) %>%
  summarise_at("completed_quest",funs(response_rate=mean,std_error=std.error)) %>%  rename("variable"=sex)
dfxxx3 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, citizen) %>%
  summarise_at("completed_quest",funs(response_rate=mean,std_error=std.error)) %>%  rename("variable"=citizen)

dfxxx4 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, primary) %>%
  summarise_at("completed_quest",funs(response_rate=mean,std_error=std.error)) %>%  rename("variable"=primary)
dfxxx5 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, `lower secondary`) %>%
  summarise_at("completed_quest",funs(response_rate=mean,std_error=std.error)) %>%  rename("variable"=`lower secondary`)
dfxxx6 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, `higher secondary`) %>%
  summarise_at("completed_quest",funs(response_rate=mean,std_error=std.error)) %>%  rename("variable"=`higher secondary`)
dfxxx7 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(group, tertiary) %>%
  summarise_at("completed_quest",funs(response_rate=mean,std_error=std.error)) %>%  rename("variable"=tertiary)

dfxxxsum <- bind_rows(dfxxx1,dfxxx2,dfxxx3,dfxxx4,dfxxx5,dfxxx6,dfxxx7)


dfxxxsum <- dfxxxsum%>% mutate_at(c("response_rate","std_error"),funs(.*100)) %>% mutate_at(c("response_rate","std_error"), round, 2)


appendixc_table2<- dfxxxsum %>% select(-c("std_error")) %>% spread(group, response_rate) %>% 
  filter(!(variable %in% c("nprimary","nlower secondary","nhigher secondary","ntertiary"))) %>%
  arrange (factor(.$variable,levels =c("Female","Male","Austrian","non-Austrian","unemployed","employed","primary","lower secondary","higher secondary","tertiary")))

#total
dfxxx1 <- dfxxx %>% select(-c("rn","NA")) %>% filter(!is.na(unemployed_at_invitation)) %>% group_by(unemployed_at_invitation) %>%
  summarise_at("completed_quest",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=unemployed_at_invitation)
dfxxx2 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(sex) %>%
  summarise_at("completed_quest",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=sex)
dfxxx3 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(citizen) %>%
  summarise_at("completed_quest",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=citizen)

dfxxx4 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(primary) %>%
  summarise_at("completed_quest",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=primary)
dfxxx5 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(`lower secondary`) %>%
  summarise_at("completed_quest",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=`lower secondary`)
dfxxx6 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(`higher secondary`) %>%
  summarise_at("completed_quest",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=`higher secondary`)
dfxxx7 <- dfxxx %>% select(-c("rn","NA")) %>% group_by(tertiary) %>%
  summarise_at("completed_quest",funs(Total=mean,n=sum(!is.na(.)))) %>%  rename("variable"=tertiary)

dfxxxsum <- bind_rows(dfxxx1,dfxxx2,dfxxx3,dfxxx4,dfxxx5,dfxxx6,dfxxx7)
dfxxxsum <- dfxxxsum%>% mutate_at(c("Total"),funs(.*100)) %>% mutate_at(c("Total"), round, 2)

appendixc_table2_total<- dfxxxsum  %>% 
  filter(!(variable %in% c("nprimary","nlower secondary","nhigher secondary","ntertiary"))) %>%
  arrange (factor(.$variable,levels =c("Female","Male","Austrian","non-Austrian","unemployed","employed","primary","lower secondary","higher secondary","tertiary")))

appendixc_table2<-cbind(appendixc_table2,appendixc_table2_total[,2:3])
totalclick <- tableX[3,]
totalclick$video<-NULL
totalclick <- totalclick %>%  rename("variable"=var) 
appendixc_table2<-rbind(appendixc_table2, totalclick)
stargazer(appendixc_table2, type = "html", summary = FALSE, rownames = FALSE, out="appendixc_table2.doc")

####################################################################################TABLE 1

#Absolute Relative Nonclick Bias##########################################

#difference between the estimated mean (or proportion) derived from respondents 
#and the corresponding statistic derived from the full sample assigned to a given mode sequence 

# mean derived from full sample assigned to a given mode sequence
fulls_df0<- cases_all %>% select(unemployed_at_invitation, sex,citizen, education, group)

fulls_dfxxx <- fulls_df0 %>% mutate(rn = row_number(),value = 1) %>%
  pivot_wider(names_from = education, values_from = value, values_fill = list(value = 0)) %>%
  mutate_at(c("unemployed_at_invitation","sex", "citizen"),funs(as.numeric(.)-1))

fulls_dfxxx2 <- fulls_dfxxx %>% select(-c("rn","NA")) %>% group_by(group) %>% summarise_all(funs(mean(., na.rm = TRUE))) %>% mutate_if(is.numeric, funs(.*100))

fulls_dfxxx2 = fulls_dfxxx2 %>% select(group:citizen, primary, `lower secondary`, `higher secondary`, tertiary)

#estimated mean (or proportion) derived from respondents
estm_df0<- cases_all %>% filter(started==TRUE) %>% select(unemployed_at_invitation, sex,citizen, education, group)

estm_dfxxx <- estm_df0 %>% mutate(rn = row_number(),value = 1) %>%
  pivot_wider(names_from = education, values_from = value, values_fill = list(value = 0)) %>%
  mutate_at(c("unemployed_at_invitation","sex", "citizen"),funs(as.numeric(.)-1))

estm_dfxxx2 <- estm_dfxxx %>% select(-c("rn","NA")) %>% group_by(group) %>% summarise_all(funs(mean(., na.rm = TRUE))) %>% mutate_if(is.numeric, funs(.*100))

estm_dfxxx2 = estm_dfxxx2 %>% select(group:citizen, primary, `lower secondary`, `higher secondary`, tertiary)


#difference between the estimated mean and full sample mean

noncontactbias <- estm_dfxxx2 -fulls_dfxxx2

#The absolute relative non-response bias is expressed as the absolute ratio of
#the direct nonresponse bias expression (Equation 1) and the full sample estimate

relnoncontactbias <- round(((noncontactbias / fulls_dfxxx2)*100),2)

absrelnoncontactbias <- round(sqrt(((noncontactbias / fulls_dfxxx2)*100)^2),2)

relnoncontactbias$group <- estm_dfxxx2$group
absrelnoncontactbias$group <- estm_dfxxx2$group

absrelnoncontactbias<- absrelnoncontactbias %>%
  gather(var, value, -group) %>% 
  spread(group, value)

relnoncontactbias<- relnoncontactbias %>%
  gather(var, value, -group) %>% 
  spread(group, value)
 
relnoncontactbias<- relnoncontactbias %>% arrange (factor(.$var,levels =c("sex","citizen","unemployed_at_invitation","primary","lower secondary","higher secondary","tertiary")))

avg_absrelnoncontactbias <- absrelnoncontactbias %>% summarise_all(funs(mean))

avg_absrelnoncontactbias$var<-"Avg. ARNB"
table1<-rbind(relnoncontactbias,avg_absrelnoncontactbias)

n_relnoncontactbias<-fulls_dfxxx %>% summarise_all(funs(sum(!is.na(.)))) %>% select(-c("rn","NA"))
n_relnoncontactbias$primary <- sum(!is.na(fulls_df0$education))
n_relnoncontactbias$`lower secondary` <- sum(!is.na(fulls_df0$education))
n_relnoncontactbias$`higher secondary` <- sum(!is.na(fulls_df0$education))
n_relnoncontactbias$tertiary <- sum(!is.na(fulls_df0$education))

n_relnoncontactbias<- n_relnoncontactbias %>%
  gather(var, value, -group) %>% 
  spread(group, value) %>%  rename("n"=`28000`)

n_relnoncontactbias<- n_relnoncontactbias %>% arrange (factor(.$var,levels =c("sex","citizen","unemployed_at_invitation","primary","lower secondary","higher secondary","tertiary")))
n_relnoncontactbias[8,2]<-""

table1<-cbind(table1,n_relnoncontactbias[,2])

table1 <- table1  %>% mutate_if(is.numeric, round, 2) %>% replace(., is.na(.), "-")
stargazer(table1, type = "html", summary = FALSE, rownames = FALSE, out="Table1.doc")

####################################################################################TABLE 2

#Absolute Relative Nonresponse Bias ##########################################

#difference between the estimated mean (or proportion) derived from respondents
#and the corresponding statistic derived from the full sample assigned to a given mode sequence 

# mean derived from full sample assigned to a given mode sequence
fulls_df0<- cases_all %>% select(unemployed_at_invitation, sex,citizen, education, group) %>% filter(group!="video")

fulls_dfxxx <- fulls_df0 %>% mutate(rn = row_number(),value = 1) %>%
  pivot_wider(names_from = education, values_from = value, values_fill = list(value = 0)) %>%
  mutate_at(c("unemployed_at_invitation","sex", "citizen"),funs(as.numeric(.)-1))

fulls_dfxxx2 <- fulls_dfxxx %>% select(-c("rn","NA")) %>% group_by(group) %>% summarise_all(funs(mean(., na.rm = TRUE))) %>% mutate_if(is.numeric, funs(.*100))

fulls_dfxxx2 = fulls_dfxxx2 %>% select(group:citizen, primary, `lower secondary`, `higher secondary`, tertiary)

#estimated mean (or proportion) derived from respondents
estm_df0<- cases_all %>% filter(completed_quest==TRUE) %>% select(unemployed_at_invitation, sex,citizen, education, group)

estm_dfxxx <- estm_df0 %>% mutate(rn = row_number(),value = 1) %>%
  pivot_wider(names_from = education, values_from = value, values_fill = list(value = 0)) %>%
  mutate_at(c("unemployed_at_invitation","sex", "citizen"),funs(as.numeric(.)-1))

estm_dfxxx2 <- estm_dfxxx %>% select(-c("rn","NA")) %>% group_by(group) %>% summarise_all(funs(mean(., na.rm = TRUE))) %>% mutate_if(is.numeric, funs(.*100))

estm_dfxxx2 = estm_dfxxx2 %>% select(group:citizen, primary, `lower secondary`, `higher secondary`, tertiary)


#difference between the estimated mean and full sample mean

nonrespbias <- estm_dfxxx2-fulls_dfxxx2

#The absolute relative non-response bias is expressed as the absolute ratio of
#the direct nonresponse bias expression (Equation 1) and the full sample estimate

relnonrespbias <- round(((nonrespbias / fulls_dfxxx2)*100),2)

absrelnonrespbias <- round(sqrt(((nonrespbias / fulls_dfxxx2)*100)^2),2)

absrelnonrespbias$group <- estm_dfxxx2$group

relnonrespbias$group <- estm_dfxxx2$group

absrelnonrespbias<- absrelnonrespbias %>%
  gather(var, value, -group) %>% 
  spread(group, value)

relnonrespbias<- relnonrespbias %>%
  gather(var, value, -group) %>% 
  spread(group, value)

relnonrespbias<- relnonrespbias %>% arrange (factor(.$var,levels =c("sex","citizen","unemployed_at_invitation","primary","lower secondary","higher secondary","tertiary")))

avg_absrelnoncontactbias <- absrelnonrespbias %>% summarise_all(funs(mean))

avg_absrelnoncontactbias$var<-"Avg. ARNB"
table2<-rbind(relnonrespbias,avg_absrelnoncontactbias)

n_relnonrespbias<-fulls_dfxxx %>% summarise_all(funs(sum(!is.na(.)))) %>% select(-c("rn","NA"))
n_relnonrespbias$primary <- sum(!is.na(fulls_df0$education))
n_relnonrespbias$`lower secondary` <- sum(!is.na(fulls_df0$education))
n_relnonrespbias$`higher secondary` <- sum(!is.na(fulls_df0$education))
n_relnonrespbias$tertiary <- sum(!is.na(fulls_df0$education))

n_relnonrespbias<- n_relnonrespbias %>%
  gather(var, value, -group) %>% 
  spread(group, value) %>%  rename("n"=`21000`)

n_relnonrespbias<- n_relnonrespbias %>% arrange (factor(.$var,levels =c("sex","citizen","unemployed_at_invitation","primary","lower secondary","higher secondary","tertiary")))
n_relnonrespbias[8,2]<-""

table2<-cbind(table2,n_relnonrespbias[,2])

table2 <- table2  %>% mutate_if(is.numeric, round, 2) %>% replace(., is.na(.), "-")
stargazer(table2, type = "html", summary = FALSE, rownames = FALSE, out="Table2.doc")

#CLICK RATE REGRESSIONS###########################################

#Missing Values
sum(is.na(cases_all$education)) # 123 missing Education dropped from analysis (<1% of sample)
sum(is.na(cases_all$unemployed_at_invitation)) #678 missing unemployment at invitation dropped (~2% of total sample)

# Regression main effects
mylogit1 <- glm(started ~ group + sex + age + education + citizen + unemployed_at_invitation, data = cases_all, family = "binomial")
summary(mylogit1)

# Regression main effects + interactions
mylogit2 <- glm(started ~ group + sex + age + education + citizen + unemployed_at_invitation + group*sex+ group*education + group*citizen  + group*unemployed_at_invitation, data = cases_all, family = "binomial")
summary(mylogit2)

#RESPONSE RATE REGRESSIONS###########################################
cases_wo_video <- cases_all %>% filter(group!="video")

#Missing Values
sum(is.na(cases_wo_video$education)) # 88 missing Education dropped from analysis (<1% of sample)
sum(is.na(cases_wo_video$unemployed_at_invitation)) #519 missing unemployment at invitation dropped (~2% of total sample)


# Regression main effects
mylogit3 <- glm(completed_quest ~ group + sex + age + education + citizen + unemployed_at_invitation, data = cases_wo_video, family = "binomial")
summary(mylogit3)

# Regression main effects + interactions
mylogit4 <- glm(completed_quest ~ group + sex + age + education + citizen + unemployed_at_invitation + group*sex+ group*education + group*citizen  + group*unemployed_at_invitation, data = cases_wo_video, family = "binomial")
summary(mylogit4)

#BREAK OFF REGRESSIONS#############################################
#Appendix D: Full logistic regression estimates of click and response 
stargazer(mylogit1,mylogit2,mylogit3,mylogit4,dep.var.labels.include = TRUE,dep.var.labels = c("Click","Response"),  type="html",no.space=TRUE,star.cutoffs = c(0.10,0.05, 0.01, 0.001),star.char = c("+", "*", "**","***"),
          omit.stat="ll",
          covariate.labels = c("Survey-video (Ref: Survey)", "Video-survey", "Video",
                               "Female (Ref: male)", "Age", "Lower Secondary (ISCED 3) (Ref: Primary ISCED 1-2)","Higher Secondary (ISCED 4-5)","Tertiary (ISCED 6-8)","Non-Austrian (Ref. Austrian)", "Unemployed (Ref. employed)",
                               "Survey-video * female","Video-survey * female","Video * female","Survey-video * Lower Secondary","Video-survey * Lower Secondary","Video * Lower Secondary",
                               "Survey-video * Higher Secondary","Video-survey * Higher Secondary","Video * Higher Secondary","Survey-video * Tertiary","Video-survey * Tertiary","Video * Tertiary","Survey-video * non-Austrian","Video-survey * non-Austrian",
                               "Video * non-Austrian","Survey-video * Unemployed","Video-survey * Unemployed","Video * Unemployed"),
          notes= c("+p<0.1,*p<0.05,**p<0.01,***p<0.001"),
          notes.append = FALSE, out="appendix_D.doc")

####################################################################
#Figure 1: Predicted probabilities of interaction effects in Model 2 CLICK
#################################################################### EMPLOYMENT STATUS
p<-ggeffect (mylogit2, terms= c("group","unemployed_at_invitation")) 
level_order <- c('survey', 'surveyvideo', 'videosurvey','video')
level_order2 <- c('employed', 'unemployed')
p$x<-factor(p$x, level = level_order)
p$group<-factor(p$group, level = level_order2)
dataplot<-as.data.frame(p)

tiff('employment.tiff', units="in", width=8, height=5, res=150)
plot<-ggplot(data=dataplot, mapping=aes(y=predicted,x=x, colour=group)) +
  geom_point(position=position_dodge(width=0.35),  size=3) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),position=position_dodge(width=0.35),width=0.15,size=1) +
  theme(axis.ticks.y=element_blank(), 
        axis.title.x = element_text(vjust=-0.5,color="black"), axis.title.y = element_text(vjust=1.5, color="black"), 
        axis.text.x=element_text(size=13, color="black"),axis.text.y=element_text(size=13, color="black"), 
        axis.title=element_text(size=18,face="bold", color="black"), 
        legend.title=element_text(size=12, color="black"), 
        legend.text=element_text(size=12, color="black"),
        legend.key=element_blank(),
        panel.grid.major = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        panel.grid.minor = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        axis.line.x = element_line(size = 1, colour = "black"),
        axis.line.y = element_line(size = 1, colour = "black"),
        panel.border= element_rect(colour = 'black',fill=NA, size=1, linetype='solid'),
        panel.background = element_rect(fill=NA)) +
  scale_colour_grey(start = .7, end = .3,labels = c("Employed", "Unemployed")) +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.','Video')) +
  scale_y_continuous(limits=c(.03, .20),labels = scales::percent)+
  labs(x = "Treatment",y="Predicted Propabilities",colour = "Employment status",shape  = "Employment status",title="") 
plot
dev.off()
###################################################################

################################################################### GENDER
p<-ggeffect (mylogit2, terms= c("group","sex")) 
level_order <- c('survey', 'surveyvideo', 'videosurvey','video')
level_order2 <- c('Male', 'Female')
p$x<-factor(p$x, level = level_order)
p$group<-factor(p$group, level = level_order2)
dataplot<-as.data.frame(p)

tiff('gender.tiff', units="in", width=8, height=5, res=150)
plot<-ggplot(data=dataplot, mapping=aes(y=predicted,x=x, colour=group)) +
  geom_point(position=position_dodge(width=0.35),  size=3) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),position=position_dodge(width=0.35),width=0.15,size=1) +
  theme(axis.ticks.y=element_blank(), 
        axis.title.x = element_text(vjust=-0.5,color="black"), axis.title.y = element_text(vjust=1.5, color="black"), 
        axis.text.x=element_text(size=13, color="black"),axis.text.y=element_text(size=13, color="black"), 
        axis.title=element_text(size=18,face="bold", color="black"), 
        legend.title=element_text(size=12, color="black"), 
        legend.text=element_text(size=12, color="black"),
        legend.key=element_blank(),
        panel.grid.major = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        panel.grid.minor = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        axis.line.x = element_line(size = 1, colour = "black"),
        axis.line.y = element_line(size = 1, colour = "black"),
        panel.border= element_rect(colour = 'black',fill=NA, size=1, linetype='solid'),
        panel.background = element_rect(fill=NA)) +
  scale_colour_grey(start = .7, end = .3,labels = c("Male", "Female")) +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.','Video')) +
  scale_y_continuous(limits=c(.03, .20),labels = scales::percent)+
  labs(x = "Treatment",y="Predicted Propabilities",colour = "Gender",shape  = "Gender",title="") 
plot
dev.off()
###################################################################

################################################################### CITIZENSHIP
p<-ggeffect (mylogit2, terms= c("group","citizen")) 
level_order <- c('survey', 'surveyvideo', 'videosurvey','video')
level_order2 <- c('Austrian', 'non-Austrian')
p$x<-factor(p$x, level = level_order)
p$group<-factor(p$group, level = level_order2)
dataplot<-as.data.frame(p)

tiff('citizenship.tiff', units="in", width=8, height=5, res=150)
plot<-ggplot(data=dataplot, mapping=aes(y=predicted,x=x, colour=group)) +
  geom_point(position=position_dodge(width=0.35),  size=3) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),position=position_dodge(width=0.35),width=0.15,size=1) +
  theme(axis.ticks.y=element_blank(), 
        axis.title.x = element_text(vjust=-0.5,color="black"), axis.title.y = element_text(vjust=1.5, color="black"), 
        axis.text.x=element_text(size=13, color="black"),axis.text.y=element_text(size=13, color="black"), 
        axis.title=element_text(size=18,face="bold", color="black"), 
        legend.title=element_text(size=12, color="black"), 
        legend.text=element_text(size=12, color="black"),
        legend.key=element_blank(),
        panel.grid.major = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        panel.grid.minor = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        axis.line.x = element_line(size = 1, colour = "black"),
        axis.line.y = element_line(size = 1, colour = "black"),
        panel.border= element_rect(colour = 'black',fill=NA, size=1, linetype='solid'),
        panel.background = element_rect(fill=NA)) +
  scale_colour_grey(start = .7, end = .3,labels = c("Austrian", "non-Austrian")) +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.','Video')) +
  scale_y_continuous(limits=c(.03, .20),labels = scales::percent)+
  labs(x = "Treatment",y="Predicted Propabilities",colour = "Citizenship",shape  = "Citizenship",title="") 
plot
dev.off()
###################################################################

################################################################### EDUCATION
p<-ggeffect (mylogit2, terms= c("group","education")) 
level_order <- c('survey', 'surveyvideo', 'videosurvey','video')
level_order2 <- c('primary', 'lower secondary','higher secondary','tertiary')
p$x<-factor(p$x, level = level_order)
p$group<-factor(p$group, level = level_order2)
dataplot<-as.data.frame(p)

tiff('education.tiff', units="in", width=8, height=5, res=150)
plot<-ggplot(data=dataplot, mapping=aes(y=predicted,x=x, colour=group)) +
  geom_point(position=position_dodge(width=0.5),  size=3) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),position=position_dodge(width=0.5),width=0.3,size=1) +
  theme(axis.ticks.y=element_blank(), 
        axis.title.x = element_text(vjust=-0.5,color="black"), axis.title.y = element_text(vjust=1.5, color="black"), 
        axis.text.x=element_text(size=13, color="black"),axis.text.y=element_text(size=13, color="black"), 
        axis.title=element_text(size=18,face="bold", color="black"), 
        legend.title=element_text(size=12, color="black"), 
        legend.text=element_text(size=12, color="black"),
        legend.key=element_blank(),
        panel.grid.major = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        panel.grid.minor = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        axis.line.x = element_line(size = 1, colour = "black"),
        axis.line.y = element_line(size = 1, colour = "black"),
        panel.border= element_rect(colour = 'black',fill=NA, size=1, linetype='solid'),
        panel.background = element_rect(fill=NA)) +
  scale_colour_grey(start = .7, end = .3,labels = c("primary", "lower secondary","higher secondary","tertiary")) +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.','Video')) +
  scale_y_continuous(limits=c(.03, .25),labels = scales::percent)+
  labs(x = "Treatment",y="Predicted Propabilities",colour = "Education",shape  = "Education",title="") 
plot
dev.off()
###################################################################

####################################################################
#Figure 1: Predicted probabilities of interaction effects in Model 2 RESPONSE
#################################################################### EMPLOYMENT STATUS
p<-ggeffect (mylogit4, terms= c("group","unemployed_at_invitation")) 
level_order <- c('survey', 'surveyvideo', 'videosurvey')
level_order2 <- c('employed', 'unemployed')
p$x<-factor(p$x, level = level_order)
p$group<-factor(p$group, level = level_order2)
dataplot<-as.data.frame(p)

tiff('employment_resp.tiff', units="in", width=8, height=5, res=150)
plot<-ggplot(data=dataplot, mapping=aes(y=predicted,x=x, colour=group)) +
  geom_point(position=position_dodge(width=0.35),  size=3) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),position=position_dodge(width=0.35),width=0.15,size=1) +
  theme(axis.ticks.y=element_blank(), 
        axis.title.x = element_text(vjust=-0.5,color="black"), axis.title.y = element_text(vjust=1.5, color="black"), 
        axis.text.x=element_text(size=13, color="black"),axis.text.y=element_text(size=13, color="black"), 
        axis.title=element_text(size=18,face="bold", color="black"), 
        legend.title=element_text(size=12, color="black"), 
        legend.text=element_text(size=12, color="black"),
        legend.key=element_blank(),
        panel.grid.major = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        panel.grid.minor = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        axis.line.x = element_line(size = 1, colour = "black"),
        axis.line.y = element_line(size = 1, colour = "black"),
        panel.border= element_rect(colour = 'black',fill=NA, size=1, linetype='solid'),
        panel.background = element_rect(fill=NA)) +
  scale_colour_grey(start = .7, end = .3,labels = c("Employed", "Unemployed")) +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.')) +
  scale_y_continuous(limits=c(.00, .20),labels = scales::percent)+
  labs(x = "Treatment",y="Predicted Propabilities",colour = "Employment status",shape  = "Employment status",title="") 
plot
dev.off()
###################################################################

################################################################### GENDER
p<-ggeffect (mylogit4, terms= c("group","sex")) 
level_order <- c('survey', 'surveyvideo', 'videosurvey')
level_order2 <- c('Male', 'Female')
p$x<-factor(p$x, level = level_order)
p$group<-factor(p$group, level = level_order2)
dataplot<-as.data.frame(p)

tiff('gender_resp.tiff', units="in", width=8, height=5, res=150)
plot<-ggplot(data=dataplot, mapping=aes(y=predicted,x=x, colour=group)) +
  geom_point(position=position_dodge(width=0.35),  size=3) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),position=position_dodge(width=0.35),width=0.15,size=1) +
  theme(axis.ticks.y=element_blank(), 
        axis.title.x = element_text(vjust=-0.5,color="black"), axis.title.y = element_text(vjust=1.5, color="black"), 
        axis.text.x=element_text(size=13, color="black"),axis.text.y=element_text(size=13, color="black"), 
        axis.title=element_text(size=18,face="bold", color="black"), 
        legend.title=element_text(size=12, color="black"), 
        legend.text=element_text(size=12, color="black"),
        legend.key=element_blank(),
        panel.grid.major = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        panel.grid.minor = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        axis.line.x = element_line(size = 1, colour = "black"),
        axis.line.y = element_line(size = 1, colour = "black"),
        panel.border= element_rect(colour = 'black',fill=NA, size=1, linetype='solid'),
        panel.background = element_rect(fill=NA)) +
  scale_colour_grey(start = .7, end = .3,labels = c("Male", "Female")) +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.')) +
  scale_y_continuous(limits=c(.00, .20),labels = scales::percent)+
  labs(x = "Treatment",y="Predicted Propabilities",colour = "Gender",shape  = "Gender",title="") 
plot
dev.off()
###################################################################

################################################################### CITIZENSHIP
p<-ggeffect (mylogit4, terms= c("group","citizen")) 
level_order <- c('survey', 'surveyvideo', 'videosurvey')
level_order2 <- c('Austrian', 'non-Austrian')
p$x<-factor(p$x, level = level_order)
p$group<-factor(p$group, level = level_order2)
dataplot<-as.data.frame(p)

tiff('citizenship_resp.tiff', units="in", width=8, height=5, res=150)
plot<-ggplot(data=dataplot, mapping=aes(y=predicted,x=x, colour=group)) +
  geom_point(position=position_dodge(width=0.35),  size=3) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),position=position_dodge(width=0.35),width=0.15,size=1) +
  theme(axis.ticks.y=element_blank(), 
        axis.title.x = element_text(vjust=-0.5,color="black"), axis.title.y = element_text(vjust=1.5, color="black"), 
        axis.text.x=element_text(size=13, color="black"),axis.text.y=element_text(size=13, color="black"), 
        axis.title=element_text(size=18,face="bold", color="black"), 
        legend.title=element_text(size=12, color="black"), 
        legend.text=element_text(size=12, color="black"),
        legend.key=element_blank(),
        panel.grid.major = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        panel.grid.minor = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        axis.line.x = element_line(size = 1, colour = "black"),
        axis.line.y = element_line(size = 1, colour = "black"),
        panel.border= element_rect(colour = 'black',fill=NA, size=1, linetype='solid'),
        panel.background = element_rect(fill=NA)) +
  scale_colour_grey(start = .7, end = .3,labels = c("Austrian", "non-Austrian")) +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.')) +
  scale_y_continuous(limits=c(.00, .20),labels = scales::percent)+
  labs(x = "Treatment",y="Predicted Propabilities",colour = "Citizenship",shape  = "Citizenship",title="") 
plot
dev.off()
###################################################################

################################################################### EDUCATION
p<-ggeffect (mylogit4, terms= c("group","education")) 
level_order <- c('survey', 'surveyvideo', 'videosurvey')
level_order2 <- c('primary', 'lower secondary','higher secondary','tertiary')
p$x<-factor(p$x, level = level_order)
p$group<-factor(p$group, level = level_order2)
dataplot<-as.data.frame(p)

tiff('education_resp.tiff', units="in", width=8, height=5, res=150)
plot<-ggplot(data=dataplot, mapping=aes(y=predicted,x=x, colour=group)) +
  geom_point(position=position_dodge(width=0.5),  size=3) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),position=position_dodge(width=0.5),width=0.3,size=1) +
  theme(axis.ticks.y=element_blank(), 
        axis.title.x = element_text(vjust=-0.5,color="black"), axis.title.y = element_text(vjust=1.5, color="black"), 
        axis.text.x=element_text(size=13, color="black"),axis.text.y=element_text(size=13, color="black"), 
        axis.title=element_text(size=18,face="bold", color="black"), 
        legend.title=element_text(size=12, color="black"), 
        legend.text=element_text(size=12, color="black"),
        legend.key=element_blank(),
        panel.grid.major = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        panel.grid.minor = element_line(size = 1, linetype = 'solid',colour="lightgrey"),
        axis.line.x = element_line(size = 1, colour = "black"),
        axis.line.y = element_line(size = 1, colour = "black"),
        panel.border= element_rect(colour = 'black',fill=NA, size=1, linetype='solid'),
        panel.background = element_rect(fill=NA)) +
  scale_colour_grey(start = .7, end = .3,labels = c("primary", "lower secondary","higher secondary","tertiary")) +
  scale_x_discrete(labels = c('Quest.', 'Quest.-Video', 'Video-Quest.')) +
  scale_y_continuous(limits=c(.00, .25),labels = scales::percent)+
  labs(x = "Treatment",y="Predicted Propabilities",colour = "Education",shape  = "Education",title="") 
plot
dev.off()
###################################################################


###################################################################
# APPENDIX E Share of devices used by those which were sucessfully contacted

appendixE_t<-table(cases_all$group, cases_all$smart)
appendixE<-prop.table(appendixE_t,1)*100
stargazer(as.data.frame.matrix(appendixE),notes =paste("N=",sum(appendixE_t)), type = "html",digits=1, digits.extra=1, summary = FALSE, rownames = TRUE, out="appendix_E.doc")

#Test for differences in mobile device usage
chisqtest_survey_smart1 <- subset(cases_all, group %in% c("survey","video"))
chisq.test(chisqtest_survey_smart1$smart, chisqtest_survey_smart1$group)

chisqtest_survey_smart2 <- subset(cases_all, group %in% c("surveyvideo","videosurvey"))
chisq.test(chisqtest_survey_smart2$smart, chisqtest_survey_smart2$group)
